# RUN 04_gerry.R first!

# efficiency gap ----
run_eff_gap <- FALSE

if (run_eff_gap) {
  d_baseline <- read_csv(here("data/precinct_baseline.csv"), show_col_types = FALSE)
  model <- read_rds(here("data/election_model.rds"))
  d_control <- read_csv(here("data/control.csv"), show_col_types = FALSE)

  d_st_base <- d_baseline |>
    group_by(state) |>
    summarize(baseline = log(sum(ndv)) - log(sum(nrv)))

  natl <- lapply(unique(d_baseline$state), function(abb) {
    v <- d_ndists |>
      mutate(pre_dists = lag(cumsum(ndists), default = 0)) |>
      filter(state == abb) |>
      pull(pre_dists)
    get_plans_matrix(load_50state_plans(abb)) + v
  }) |>
    do.call("rbind", args = _)

  # egaps ----
  # gauss-hermite weights, scaled appropriately
  gh = lme4::GHrule(6, asMatrix=FALSE) |>
    mutate(z = z * model$natl_sd)
  # calc egap by state-draw
  d_egap = map_dfr(cli_progress_along(state.abb), function(i) {
    abbr = state.abb[i]
    d_st = filter(d_baseline, state == abbr)
    plans = load_50state_plans(abbr)
    ndists = attr(plans, "ndists")

    ldvs = part_dvs(plans, d_st, ndv, nrv) |>
      qlogis()
    seats = outer(ldvs, gh$z, `+`) |>
      model$prob_win_logit() |>
      rowWeightedMeans(w=gh$w) |>
      matrix(ncol=5001) |>
      colSums()
    dvs = with(d_st, sum(ndv) / (sum(ndv) + sum(nrv)))

    egaps <- 2*dvs - seats/ndists - 0.5

    tibble(state = rep(abbr, 5000),
           egap_enac = rep(egaps[1], 5000),
           egap = egaps[-1],
           draw = 1:5000)
  }) |>
    left_join(d_control, by="state") |>
    left_join(d_st_base, by="state")


  dvs_natl <- with(d_baseline, sum(ndv) / (sum(ndv) + sum(nrv)))
  d_natl_egap = group_by(d_seats, draw) |>
    summarize(label = "NATIONAL",
              egap_enac = 2*dvs_natl - sum(seats_enac)/435 - 0.5,
              egap = 2*dvs_natl - sum(seats)/435 - 0.5,
              diff = -1000,
              control = NA_character_)


  d_egap |>
    filter(!state %in% c("AK", "DE", "ND", "SD", "VT", "WY")) |>
    mutate(control = factor(recode_control[control], levels=recode_control),
           diff = egap_enac - egap) |>
    left_join(ap_abbr, by="state") |>
    left_join(d_ndists, by="state") |>
    mutate(label = str_glue("{ap} ({ndists})")) |>
    bind_rows(d_natl_egap) |>
  ggplot(aes(x = egap_enac - egap, y = reorder(label, -diff, FUN = median), color = control)) +
    geom_vline(xintercept=0, lwd=0.4) +
    stat_pointinterval(.width=0.0, fatten_point=0) +
    annotate("rect", fill="#00000011", xmin=-Inf, xmax=Inf, ymin=44.5, ymax=Inf) +
    stat_pointinterval(interval_size_range=c(0.8, 2.0), fatten_point=1.0) +
    scale_color_manual(name="Control of redistricting", values=PAL_control, na.value="black") +
    scale_x_continuous("Difference in efficiency gap under enacted plan\ncompared to non-partisan baseline", expand=c(0, 0),
                       label = label_egap, breaks = seq(-.2, .25, by = 0.05)) +
    labs(y="← Favors Republicans          States, ordered by efficiency gap difference          Favors Democrats →") +
    coord_cartesian(xlim=c(-0.16, 0.21)) +
    # coord_cartesian(xlim=c(-0.075, 0.125)) +
    theme_bw(base_size=8, base_family="Arial") +
    theme(legend.position=c(0.97, 0.95),
          legend.justification=c(1, 1),
          legend.background=element_blank(),
          legend.key=element_blank(),
          legend.key.height=unit(15, "pt"),
          legend.title=element_text(size=8, face="bold"),
          legend.text=element_text(size=8),
          axis.text.y=element_text(face="bold", color="black"))

  ggsave(here("paper/figures/state_egap.pdf"), height=5.5, width=5, device=cairo_pdf)

}
